Loading libraries

dyn.load("/home/moyra.lawrence/miniconda3/envs/monocle3/lib/libgeos_c.so.1")
library(Seurat)
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.11.0-CAPI-1.17.0
## and GEOS at installation 3.8.0-CAPI-1.13.1differ
library(patchwork)
library(ggplot2)
library(gridExtra)
library(openxlsx)
library(svglite)
library(dplyr)
library(gt)
library(tidyverse)
library(stringi)

#Loading embryo data and preparing for seurat

raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)

names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names

ML11 <- CreateSeuratObject(counts = raw_data, project = "ML11")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')

Checking filtering of embryo data

VlnPlot(ML11, pt.size=0, features = c("nFeature_RNA", "nCount_RNA"), ncol = 3)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
VlnPlot(ML11, pt.size=0, features = c("nFeature_RNA", "nCount_RNA"), ncol = 3, log=TRUE)
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
FeatureScatter(ML11, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

#Quick investigation of markers

ML11 <- FindVariableFeatures(ML11, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(ML11), 10)
plot1 <- VariableFeaturePlot(ML11)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1133 rows containing missing values (geom_point).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

### Scaling

all.genes <- rownames(ML11)
ML11 <- ScaleData(ML11, features = all.genes)

Normalisation

scale.factor = 10000

ML11 <- NormalizeData(ML11, normalization.method = "LogNormalize", scale.factor = 10000)

PCA of embryo data

ML11 <- RunPCA(ML11, npcs=100, features = VariableFeatures(object = ML11))

print(ML11[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(ML11, dims = 1:6, reduction = "pca")
DimPlot(ML11, reduction = "pca")
DimHeatmap(ML11, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(ML11, ndims=100)

## UMAP resolution = 0.7 for ML11

#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
ML11 <- FindNeighbors(ML11, dims = 1:50)
ML11 <- FindClusters(ML11, resolution = 1)
ML11 <- RunUMAP(ML11, dims = 1:50)
DimPlot(ML11, reduction = "umap")

tbl_cell <- table(ML11@active.ident, ML11@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add
##       16cell     4cell      8cell        BXC C57twocell early2cell earlyblast
## 1 23 (39.7%) 14 (100%) 24 (51.1%)     0 (0%)     0 (0%)     0 (0%)     0 (0%)
## 2     0 (0%)    0 (0%)     0 (0%)     0 (0%)     0 (0%)     0 (0%) 18 (41.9%)
## 3     0 (0%)    0 (0%)   1 (2.1%)     0 (0%)     0 (0%)     0 (0%) 21 (48.8%)
## 4   1 (1.7%)    0 (0%)     0 (0%)  3 (23.1%)   8 (100%)   8 (100%)     0 (0%)
## 5 26 (44.8%)    0 (0%) 14 (29.8%)     0 (0%)     0 (0%)     0 (0%)     0 (0%)
## 6  8 (13.8%)    0 (0%)    8 (17%) 10 (76.9%)     0 (0%)     0 (0%)   4 (9.3%)
##   fibroblast late2cell lateblast  mid2cell   midblast      zy1      zy2
## 1     0 (0%)    0 (0%)    0 (0%)   3 (25%)     0 (0%)   0 (0%)   0 (0%)
## 2    5 (50%)    0 (0%)  18 (60%)    0 (0%) 22 (36.7%)   0 (0%)   0 (0%)
## 3     0 (0%)    0 (0%) 8 (26.7%)    0 (0%) 31 (51.7%)   0 (0%)   0 (0%)
## 4    5 (50%) 10 (100%)    0 (0%) 7 (58.3%)     0 (0%) 1 (100%) 1 (100%)
## 5     0 (0%)    0 (0%)    0 (0%) 2 (16.7%)     0 (0%)   0 (0%)   0 (0%)
## 6     0 (0%)    0 (0%) 4 (13.3%)    0 (0%)  7 (11.7%)   0 (0%)   0 (0%)
##        zy3      zy4 rownames
## 1   0 (0%)   0 (0%)        0
## 2   0 (0%)   0 (0%)        1
## 3   0 (0%)   0 (0%)        2
## 4 1 (100%) 1 (100%)        3
## 5   0 (0%)   0 (0%)        4
## 6   0 (0%)   0 (0%)        5

Finding cluster markers

mus.markers <- FindAllMarkers(ML11, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'ML11.celltype.allmarkers.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top10
##  [1] "LOC100502936"  "Bcl2l10"       "E330034G19Rik" "Zscan5b"      
##  [5] "Gm1995"        "Oas1d"         "S100a4"        "Tcl1"         
##  [9] "Zfp707"        "Serpinf1"
save(ML11, file="GSE_dataset.rda")
load(file='GSE_dataset.rda')

#Plotting seurat totipotent cluster markers

load(file='GSE_dataset.rda')
seurat <- c("Cacna1s", "Cenpe", "Dppa2", "Dppa5a", "Dusp4", "Gm2056", "Gm5788", "Gm2035", "Gm5039", "Gm16368", "Gm21319", "Gm2016", "Gm2022", "Gm4027", "Gm5662", "Gm8300", "Eif4a2", "Fbxo34", "Nelfa", "Nub1", "Olfr374", "Platr31", "Polr2a", "B020004J07Rik", "Gm12800", "Gm12794", "BC080695", "Gm13119", "Rbbp6", "Rpl10l", "Rpl12", "Rpl39l", "Rplp0", "Rxra", "Sp110", "Sp140", "Ube2c", "Ube2s", "Ube2t", "Ube2w", "Uimc1", "Usp17la", "Usp17lc", "Usp17le", "Usp26", "Usp9y", "Ythdc1", "Zfp296", "Zfp352", "Zfp51", "Zfp516", "Zfp560", "Zfp809", "Zfp820", "Zfp936", "Zfp950", "Zfp980", "Zfp981", "Zfp988", "Zfp992", "Zscan4c", "Zscan4d", "Zscan4e", "Zscan4-ps1", "Zscan4-ps2", "Zscan4-ps3") 
seurat2<- c("Kdm1a", "Hdac1", "Hdac2", "Rcor1", "Sumo2", "Zmym2", "Clec10a", "Zfp42", "Arg2")
levels(ML11$orig.ident)
##  [1] "16cell"     "4cell"      "8cell"      "BXC"        "C57twocell"
##  [6] "early2cell" "earlyblast" "fibroblast" "late2cell"  "lateblast" 
## [11] "mid2cell"   "midblast"   "zy1"        "zy2"        "zy3"       
## [16] "zy4"
ML11$orig.ident<- factor(x = ML11$orig.ident, levels = c("zy1", "zy2", "zy3", "zy4", "C57twocell", "early2cell", "mid2cell", "late2cell", "4cell", "8cell", "16cell", "earlyblast", "midblast", "lateblast", "fibroblast", "BXC"))
DoHeatmap(ML11, features = seurat, group.by = "orig.ident") + theme(axis.text.y = element_text(size = 6))
## Warning in DoHeatmap(ML11, features = seurat, group.by = "orig.ident"): The
## following features were omitted as they were not found in the scale.data slot
## for the RNA assay: Zscan4-ps3, Zscan4-ps2, Zscan4-ps1, Zfp992, Zfp988, Zfp981,
## Zfp980, Zfp950, Usp17le, Usp17lc, Usp17la, Gm12800, Platr31, Nelfa, Gm21319,
## Gm16368, Gm2035, Gm5788, Gm2056

DoHeatmap(ML11, features = seurat2,group.by = "orig.ident") + theme(axis.text.y = element_text(size = 6))

Reloading embryo data and preparing for seurat

raw_data <- data.table::fread("/home/moyra.lawrence/R_analysis/Second_10x_run/GSE45719_matrix.txt.gz", data.table = FALSE)

names(raw_data)[1] <- 'Gene'
names <- make.unique(raw_data$Gene)
rownames(raw_data) <- names
raw_data <- raw_data[,-1] # get rid of old names

colnames(raw_data) <- gsub("_", "-", colnames(raw_data))
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "_", replacement = "-")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy1", replacement = "zygote-1")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy2", replacement = "zygote-2")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy3", replacement = "zygote-3")
colnames(raw_data) <- stri_replace_all_fixed(str = colnames(raw_data), pattern = "zy4", replacement = "zygote-4")

ML11 <- CreateSeuratObject(counts=raw_data, project="ML11", names.delim="-")
## Warning: Feature names cannot have pipe characters ('|'), replacing with dashes
## ('-')
ML11$orig.ident
##            16cell-1-10            16cell-1-11            16cell-1-12 
##                 16cell                 16cell                 16cell 
##            16cell-1-13            16cell-1-14            16cell-1-15 
##                 16cell                 16cell                 16cell 
##             16cell-1-2             16cell-1-3             16cell-1-4 
##                 16cell                 16cell                 16cell 
##             16cell-1-5             16cell-1-6             16cell-1-7 
##                 16cell                 16cell                 16cell 
##             16cell-1-8             16cell-1-9             16cell-4-1 
##                 16cell                 16cell                 16cell 
##            16cell-4-10            16cell-4-11             16cell-4-2 
##                 16cell                 16cell                 16cell 
##             16cell-4-3             16cell-4-4             16cell-4-5 
##                 16cell                 16cell                 16cell 
##             16cell-4-6             16cell-4-7             16cell-4-8 
##                 16cell                 16cell                 16cell 
##             16cell-4-9             16cell-5-1            16cell-5-10 
##                 16cell                 16cell                 16cell 
##            16cell-5-11            16cell-5-12            16cell-5-13 
##                 16cell                 16cell                 16cell 
##             16cell-5-2             16cell-5-3             16cell-5-4 
##                 16cell                 16cell                 16cell 
##             16cell-5-5             16cell-5-6             16cell-5-7 
##                 16cell                 16cell                 16cell 
##             16cell-5-8             16cell-5-9             16cell-6-1 
##                 16cell                 16cell                 16cell 
##            16cell-6-10            16cell-6-11            16cell-6-12 
##                 16cell                 16cell                 16cell 
##             16cell-6-2             16cell-6-3             16cell-6-4 
##                 16cell                 16cell                 16cell 
##             16cell-6-5             16cell-6-6             16cell-6-7 
##                 16cell                 16cell                 16cell 
##             16cell-6-8             16cell-6-9              4cell-1-1 
##                 16cell                 16cell                  4cell 
##              4cell-1-2              4cell-1-4              4cell-2-1 
##                  4cell                  4cell                  4cell 
##              4cell-2-2              4cell-2-3              4cell-2-4 
##                  4cell                  4cell                  4cell 
##              4cell-3-1              4cell-3-3              4cell-3-4 
##                  4cell                  4cell                  4cell 
##              4cell-4-1              4cell-4-2              4cell-4-3 
##                  4cell                  4cell                  4cell 
##              4cell-4-4              8cell-1-1              8cell-1-2 
##                  4cell                  8cell                  8cell 
##              8cell-1-4              8cell-1-5              8cell-1-6 
##                  8cell                  8cell                  8cell 
##              8cell-1-7              8cell-1-8              8cell-2-1 
##                  8cell                  8cell                  8cell 
##              8cell-2-2              8cell-2-3              8cell-2-4 
##                  8cell                  8cell                  8cell 
##              8cell-2-6              8cell-2-7              8cell-2-8 
##                  8cell                  8cell                  8cell 
##              8cell-5-1              8cell-5-2              8cell-5-3 
##                  8cell                  8cell                  8cell 
##              8cell-5-4              8cell-5-6              8cell-5-7 
##                  8cell                  8cell                  8cell 
##              8cell-5-8              8cell-8-1              8cell-8-2 
##                  8cell                  8cell                  8cell 
##              8cell-8-3              8cell-8-4              8cell-8-6 
##                  8cell                  8cell                  8cell 
##              8cell-8-7              8cell-8-8  BXC-100pg-liver-RNA-1 
##                  8cell                  8cell                    BXC 
##  BXC-100pg-liver-RNA-2   BXC-10pg-liver-RNA-1   BXC-10pg-liver-RNA-2 
##                    BXC                    BXC                    BXC 
##   BXC-1ng-liver-RNA-0r    BXC-1ng-liver-RNA-1  BXC-30pg-liver-RNA-0r 
##                    BXC                    BXC                    BXC 
##   BXC-30pg-liver-RNA-2       BXC-liver-cell-1       BXC-liver-cell-2 
##                    BXC                    BXC                    BXC 
##       BXC-liver-cell-4       BXC-liver-cell-5       BXC-liver-cell-6 
##                    BXC                    BXC                    BXC 
##        C57twocell-16-1        C57twocell-16-2        C57twocell-18-1 
##             C57twocell             C57twocell             C57twocell 
##        C57twocell-18-2        C57twocell-20-1        C57twocell-20-2 
##             C57twocell             C57twocell             C57twocell 
##        C57twocell-22-1        C57twocell-22-2        early2cell-0r-1 
##             C57twocell             C57twocell             early2cell 
##        early2cell-0r-2         early2cell-1-1         early2cell-1-2 
##             early2cell             early2cell             early2cell 
##         early2cell-2-1         early2cell-2-2         early2cell-3-1 
##             early2cell             early2cell             early2cell 
##         early2cell-3-2         earlyblast-2-1        earlyblast-2-10 
##             early2cell             earlyblast             earlyblast 
##        earlyblast-2-12        earlyblast-2-15        earlyblast-2-16 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-2-17        earlyblast-2-19         earlyblast-2-2 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-2-22         earlyblast-2-3         earlyblast-2-4 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-2-5         earlyblast-2-6         earlyblast-2-7 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-2-8         earlyblast-3-1        earlyblast-3-10 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-3-12        earlyblast-3-13        earlyblast-3-14 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-3-15        earlyblast-3-16        earlyblast-3-17 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-2         earlyblast-3-3         earlyblast-3-4 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-6         earlyblast-3-7         earlyblast-3-8 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-3-9         earlyblast-4-1        earlyblast-4-12 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-4-13        earlyblast-4-14        earlyblast-4-16 
##             earlyblast             earlyblast             earlyblast 
##        earlyblast-4-17        earlyblast-4-18         earlyblast-4-2 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-4-3         earlyblast-4-5         earlyblast-4-6 
##             earlyblast             earlyblast             earlyblast 
##         earlyblast-4-8         earlyblast-4-9          late2cell-5-1 
##             earlyblast             earlyblast              late2cell 
##          late2cell-5-2          late2cell-6-1          late2cell-6-2 
##              late2cell              late2cell              late2cell 
##          late2cell-7-1          late2cell-7-2          late2cell-8-1 
##              late2cell              late2cell              late2cell 
##          late2cell-8-2          late2cell-9-1          late2cell-9-2 
##              late2cell              late2cell              late2cell 
##         lateblast-1-10         lateblast-1-11         lateblast-1-13 
##              lateblast              lateblast              lateblast 
##         lateblast-1-14         lateblast-1-16         lateblast-1-19 
##              lateblast              lateblast              lateblast 
##          lateblast-1-2         lateblast-1-20         lateblast-1-21 
##              lateblast              lateblast              lateblast 
##         lateblast-1-23         lateblast-1-24         lateblast-1-26 
##              lateblast              lateblast              lateblast 
##         lateblast-1-27          lateblast-1-4          lateblast-1-5 
##              lateblast              lateblast              lateblast 
##          lateblast-1-6          lateblast-1-7          lateblast-1-8 
##              lateblast              lateblast              lateblast 
##          lateblast-1-9          lateblast-2-1         lateblast-2-12 
##              lateblast              lateblast              lateblast 
##         lateblast-2-14         lateblast-2-16         lateblast-2-17 
##              lateblast              lateblast              lateblast 
##          lateblast-2-2          lateblast-2-3          lateblast-2-5 
##              lateblast              lateblast              lateblast 
##          lateblast-2-7          lateblast-2-8          lateblast-2-9 
##              lateblast              lateblast              lateblast 
##          mid2cell-0r-1          mid2cell-0r-2           mid2cell-3-1 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-3-2           mid2cell-4-1           mid2cell-4-2 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-5-1           mid2cell-5-2           mid2cell-6-1 
##               mid2cell               mid2cell               mid2cell 
##           mid2cell-6-2           mid2cell-7-1           mid2cell-7-2 
##               mid2cell               mid2cell               mid2cell 
##           midblast-1-1          midblast-1-10          midblast-1-11 
##               midblast               midblast               midblast 
##          midblast-1-12          midblast-1-13          midblast-1-14 
##               midblast               midblast               midblast 
##          midblast-1-15          midblast-1-16          midblast-1-17 
##               midblast               midblast               midblast 
##          midblast-1-18          midblast-1-19           midblast-1-2 
##               midblast               midblast               midblast 
##          midblast-1-20          midblast-1-22          midblast-1-23 
##               midblast               midblast               midblast 
##          midblast-1-24           midblast-1-3           midblast-1-4 
##               midblast               midblast               midblast 
##           midblast-1-5           midblast-1-6           midblast-1-8 
##               midblast               midblast               midblast 
##           midblast-1-9           midblast-2-1          midblast-2-10 
##               midblast               midblast               midblast 
##          midblast-2-11          midblast-2-12          midblast-2-13 
##               midblast               midblast               midblast 
##          midblast-2-14          midblast-2-15          midblast-2-16 
##               midblast               midblast               midblast 
##          midblast-2-17          midblast-2-18           midblast-2-2 
##               midblast               midblast               midblast 
##          midblast-2-23          midblast-2-24           midblast-2-3 
##               midblast               midblast               midblast 
##           midblast-2-4           midblast-2-5           midblast-2-6 
##               midblast               midblast               midblast 
##           midblast-2-7           midblast-2-8           midblast-2-9 
##               midblast               midblast               midblast 
##           midblast-3-1          midblast-3-10          midblast-3-11 
##               midblast               midblast               midblast 
##          midblast-3-12          midblast-3-13          midblast-3-14 
##               midblast               midblast               midblast 
##          midblast-3-15          midblast-3-17          midblast-3-18 
##               midblast               midblast               midblast 
##           midblast-3-2          midblast-3-23           midblast-3-3 
##               midblast               midblast               midblast 
##           midblast-3-4           midblast-3-5           midblast-3-6 
##               midblast               midblast               midblast 
##           midblast-3-7           midblast-3-8           midblast-3-9 
##               midblast               midblast               midblast 
##               zygote-1               zygote-2               zygote-3 
##                 zygote                 zygote                 zygote 
##               zygote-4 16cell-2pooled-split6a 16cell-2pooled-split6b 
##                 zygote                 16cell                 16cell 
## 16cell-2pooled-split8a 16cell-2pooled-split8b         16cell-split1a 
##                 16cell                 16cell                 16cell 
##         16cell-split1b         16cell-split2a         16cell-split2b 
##                 16cell                 16cell                 16cell 
##   8cell-12-1-smartseq2   8cell-12-2-smartseq2   8cell-12-3-smartseq2 
##                  8cell                  8cell                  8cell 
##   8cell-12-4-smartseq2   8cell-13-1-smartseq2   8cell-14-1-smartseq2 
##                  8cell                  8cell                  8cell 
##   8cell-14-2-smartseq2   8cell-14-3-smartseq2   8cell-14-4-smartseq2 
##                  8cell                  8cell                  8cell 
##  8cell-2pooled-split5a  8cell-2pooled-split5b  8cell-2pooled-split7a 
##                  8cell                  8cell                  8cell 
##  8cell-2pooled-split7b  8cell-2pooled-split9a  8cell-2pooled-split9b 
##                  8cell                  8cell                  8cell 
##          8cell-split3a          8cell-split3b          8cell-split4a 
##                  8cell                  8cell                  8cell 
##          8cell-split4b      fibroblast-13-CxB      fibroblast-14-CxB 
##                  8cell             fibroblast             fibroblast 
##      fibroblast-15-CxB      fibroblast-16-CxB      fibroblast-17-BxC 
##             fibroblast             fibroblast             fibroblast 
##      fibroblast-19-BxC      fibroblast-20-BxC      fibroblast-21-BxC 
##             fibroblast             fibroblast             fibroblast 
##      fibroblast-22-BxC       fibroblast-9-CxB 
##             fibroblast             fibroblast 
## 13 Levels: 16cell 4cell 8cell BXC C57twocell early2cell ... zygote
ML11[["in.vivo"]] <- Idents(object = ML11)
ML11$orig.ident <- "ML11"

Loading data

ML1.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Empty_vector3/outs/filtered_feature_bc_matrix/")
ML2.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Dppa2/outs/filtered_feature_bc_matrix/")
ML3.data <- Read10X(data.dir = "/home/moyra.lawrence/count/Gata3/outs/filtered_feature_bc_matrix/")
ML4.data <- Read10X(data.dir = "/home/moyra.lawrence/count/MERVL_VP64/outs/filtered_feature_bc_matrix/")
ML5.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Empty_puro/outs/filtered_feature_bc_matrix/")
ML6.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Empty_hygro/outs/filtered_feature_bc_matrix/")
ML7.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_Gata3/outs/filtered_feature_bc_matrix/")
ML8.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_MERVL/outs/filtered_feature_bc_matrix/")
ML9.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Gata3_MERVL/outs/filtered_feature_bc_matrix/")
ML10.data <- Read10X(data.dir = "/home/moyra.lawrence/count2/Dppa2_Gata3_MERVL/outs/filtered_feature_bc_matrix/")


ML1 <- CreateSeuratObject(counts = ML1.data, project = "ML1")
ML2 <- CreateSeuratObject(counts = ML2.data, project = "ML2")
ML3 <- CreateSeuratObject(counts = ML3.data, project = "ML3")
ML4 <- CreateSeuratObject(counts = ML4.data, project = "ML4")
ML5 <- CreateSeuratObject(counts = ML5.data, project = "ML5")
ML6 <- CreateSeuratObject(counts = ML6.data, project = "ML6")
ML7 <- CreateSeuratObject(counts = ML7.data, project = "ML7")
ML8 <- CreateSeuratObject(counts = ML8.data, project = "ML8")
ML9 <- CreateSeuratObject(counts = ML9.data, project = "ML9")
ML10 <- CreateSeuratObject(counts = ML10.data, project = "ML10")

ML1
## An object of class Seurat 
## 32293 features across 4406 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML2
## An object of class Seurat 
## 32293 features across 3977 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML3
## An object of class Seurat 
## 32293 features across 4835 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML4
## An object of class Seurat 
## 32293 features across 3501 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML5
## An object of class Seurat 
## 32293 features across 4921 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML6
## An object of class Seurat 
## 32293 features across 4179 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML7
## An object of class Seurat 
## 32293 features across 5105 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML8
## An object of class Seurat 
## 32293 features across 3117 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML9
## An object of class Seurat 
## 32293 features across 4678 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)
ML10
## An object of class Seurat 
## 32293 features across 3210 samples within 1 assay 
## Active assay: RNA (32293 features, 0 variable features)

Preparing for filtering

ML1[["percent.mt"]] <- PercentageFeatureSet(ML1, pattern = "^mt-")
ML2[["percent.mt"]] <- PercentageFeatureSet(ML2, pattern = "^mt-")
ML3[["percent.mt"]] <- PercentageFeatureSet(ML3, pattern = "^mt-")
ML4[["percent.mt"]] <- PercentageFeatureSet(ML4, pattern = "^mt-")
ML5[["percent.mt"]] <- PercentageFeatureSet(ML5, pattern = "^mt-")
ML6[["percent.mt"]] <- PercentageFeatureSet(ML6, pattern = "^mt-")
ML7[["percent.mt"]] <- PercentageFeatureSet(ML7, pattern = "^mt-")
ML8[["percent.mt"]] <- PercentageFeatureSet(ML8, pattern = "^mt-")
ML9[["percent.mt"]] <- PercentageFeatureSet(ML9, pattern = "^mt-")
ML10[["percent.mt"]] <- PercentageFeatureSet(ML10, pattern = "^mt-")

Filtering conditions for all datasets apart from embryo data

3500 < nFeature_RNA
10,000 < nCount_RNA <100,000
0 =< percent.mt < 10

Filtering datasets

ML1.filtered <- subset(ML1, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML2.filtered <- subset(ML2, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML3.filtered <- subset(ML3, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML4.filtered <- subset(ML4, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML5.filtered <- subset(ML5, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML6.filtered <- subset(ML6, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML7.filtered <- subset(ML7, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML8.filtered <- subset(ML8, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML9.filtered <- subset(ML9, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)
ML10.filtered <- subset(ML10, subset = nFeature_RNA > 3500 & nCount_RNA > 10000 & nCount_RNA < 100000 & percent.mt < 10)

Merging data

merge.filtered <- merge(ML1.filtered, y = c(ML2.filtered, ML3.filtered, ML4.filtered, ML5.filtered, ML6.filtered, ML7.filtered, ML8.filtered, ML9.filtered, ML10.filtered, ML11), add.cell.ids = c("Empty_hygro_1", "Dppa2", "Gata3", "MERVL", "Empty_puro", "Empty_hygro_2", "Dppa2_Gata3", "Dppa2_MERVL", "Gata3_MERVL", "Dppa2_Gata3_MERVL", "Embryo"), project = "ML1")
merge.filtered
## An object of class Seurat 
## 36331 features across 34804 samples within 1 assay 
## Active assay: RNA (36331 features, 0 variable features)

Checking merge

head(colnames(merge.filtered))
## [1] "Empty_hygro_1_AAACCCAAGGAAGTAG-1" "Empty_hygro_1_AAACCCAGTCGCACGT-1"
## [3] "Empty_hygro_1_AAACCCAGTGTGCTTA-1" "Empty_hygro_1_AAACCCATCTGGTCAA-1"
## [5] "Empty_hygro_1_AAACGAAAGTAACCGG-1" "Empty_hygro_1_AAACGAAGTCCTGGTG-1"
tail(colnames(merge.filtered))
## [1] "Embryo_fibroblast-17-BxC" "Embryo_fibroblast-19-BxC"
## [3] "Embryo_fibroblast-20-BxC" "Embryo_fibroblast-21-BxC"
## [5] "Embryo_fibroblast-22-BxC" "Embryo_fibroblast-9-CxB"
unique(sapply(X = strsplit(colnames(merge.filtered), split = "_"), FUN = "[", 1))
## [1] "Empty"  "Dppa2"  "Gata3"  "MERVL"  "Embryo"
table(merge.filtered$orig.ident)
## 
##  ML1 ML10 ML11  ML2  ML3  ML4  ML5  ML6  ML7  ML8  ML9 
## 3664 2665  317 3153 3956 3047 4144 3565 4315 2090 3888
table(merge.filtered$in.vivo)
## 
##     16cell      4cell      8cell        BXC C57twocell early2cell earlyblast 
##         58         14         47         13          8          8         43 
## fibroblast  late2cell  lateblast   mid2cell   midblast     zygote 
##         10         10         30         12         60          4

Confirming filtering

vp1 <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA")) +  theme(legend.position = 'none')
vp2 <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA")) +  theme(legend.position = 'none')
vp3 <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt")) + theme(legend.position = 'none')
vp1.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp2.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp3.log <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt"),log=TRUE) +  theme(legend.position = 'none')
grid.arrange(vp1,vp2,vp3, nrow=1)
grid.arrange(vp1.log,vp2.log,vp3.log, nrow=1)
FeatureScatter(merge.filtered, "nCount_RNA", "percent.mt", pt.size=0.5) + scale_y_continuous(limits=c(0,100)) + scale_x_continuous(limits=c(0,100000))
FeatureScatter(merge.filtered, "nCount_RNA", "nFeature_RNA", pt.size=0.5) + scale_y_continuous(limits=c(0,12000)) + scale_x_continuous(limits=c(0,100000))

Splitting dataset

merge.list <- SplitObject(merge.filtered, split.by = "orig.ident")
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, verbose = FALSE)
})

features <- SelectIntegrationFeatures(object.list = merge.list)
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})

Performing integration with embryo and WT ESCs as reference

anchors <- FindIntegrationAnchors(object.list = merge.list, reference = c(1, 5, 6, 11), reduction = "rpca",
    dims = 1:50 )
## Computing 2000 integration features
## Scaling features for provided objects
## Computing within dataset neighborhoods
## Finding anchors between all query and reference datasets
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2065 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 858 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1391 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1802 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1495 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1109 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1707 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1484 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1250 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1083 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1868 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2174 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1988 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2964 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2104 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1583 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2022 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1858 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1383 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2058 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1977 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 1835 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2220 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 2095 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 201 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 167 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 175 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 164 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 177 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 165 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 187 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 176 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 192 anchors
## Projecting new data onto SVD
## Projecting new data onto SVD
## Finding neighborhoods
## Finding anchors
##  Found 160 anchors
totipotency <- IntegrateData(anchorset = anchors, dims = 1:50)
## Building integrated reference
## Merging dataset 11 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 6 into 5
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 11 into 5 6
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 2 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 3 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 4 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 7 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 8 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 9 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 10 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data

#Save object

DefaultAssay(totipotency) <- "integrated"
save(totipotency, file="In_vivo_integrated3.rda")
#load(file='In_vivo_integrated3.rda')

Run the standard workflow for visualisation and clustering

Identify variable genes

totipotency <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(totipotency), 10)
plot1 <- VariableFeaturePlot(totipotency)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2

### Scaling

all.genes <- rownames(totipotency)
totipotency <- ScaleData(totipotency, features = all.genes)

#Omitted Normalization as this is an integrated object

PCA combined dataset

totipotency <- RunPCA(totipotency, npcs=100, features = rownames(totipotency))

print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)

## UMAP resolution = 0.6 for totipotency

dim(totipotency)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
totipotency <- FindNeighbors(totipotency, dims = 1:50)
totipotency <- FindClusters(totipotency, resolution = 0.6)
totipotency <- RunUMAP(totipotency, dims = 1:50)

Original identity UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1

Clusters on UMAP

p2 <- DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p2

Quality control

DefaultAssay(totipotency) <- "RNA"
totipotency <- NormalizeData(totipotency)
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst")

Defining cell cycle genes

g2m_1.genes <-  c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80","Cks2","Nuf2","Cks1b", "Mki67",   "Tmpo","Cenpf","Tacc3","Fam64a","Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e","Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Hn1","Cdc20","Ttk","Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2","Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf","Nek2","G2e3","Gas2l3","Cbx5","Cenpa") 

s_1.genes <-  c("Mcm5","Pcna","Tyms","Fen1","Mcm2","Mcm4","Rrm1","Ung","Gins2","Mcm6","Cdca7","Dtl","Prim1","Uhrf1","Mlf1ip","Hells","Rfc2","Rpa2","Nasp","Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2","Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2","Usp1","Clspn","Pola1","Chaf1b","Brip1","E2f8")   

Assigning cell cycle score

totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)

# view cell cycle scores and phase assignments
head(totipotency[[]])
##                                  orig.ident nCount_RNA nFeature_RNA percent.mt
## Empty_hygro_1_AAACCCAAGGAAGTAG-1        ML1      20184         4191   4.231074
## Empty_hygro_1_AAACCCAGTCGCACGT-1        ML1      26366         5162   3.986194
## Empty_hygro_1_AAACCCAGTGTGCTTA-1        ML1      54111         7691   4.477833
## Empty_hygro_1_AAACCCATCTGGTCAA-1        ML1      28283         5743   4.009476
## Empty_hygro_1_AAACGAAAGTAACCGG-1        ML1      30028         5260   4.878780
## Empty_hygro_1_AAACGAAGTCCTGGTG-1        ML1      19053         4860   4.734163
##                                  in.vivo integrated_snn_res.0.6 seurat_clusters
## Empty_hygro_1_AAACCCAAGGAAGTAG-1    <NA>                      2               2
## Empty_hygro_1_AAACCCAGTCGCACGT-1    <NA>                      2               2
## Empty_hygro_1_AAACCCAGTGTGCTTA-1    <NA>                      9               9
## Empty_hygro_1_AAACCCATCTGGTCAA-1    <NA>                      0               0
## Empty_hygro_1_AAACGAAAGTAACCGG-1    <NA>                      0               0
## Empty_hygro_1_AAACGAAGTCCTGGTG-1    <NA>                      1               1
##                                      S.Score    G2M.Score Phase old.ident
## Empty_hygro_1_AAACCCAAGGAAGTAG-1 -0.06194625 -0.306587988    G1         2
## Empty_hygro_1_AAACCCAGTCGCACGT-1  0.13928402 -0.313254895     S         2
## Empty_hygro_1_AAACCCAGTGTGCTTA-1  0.08786819 -0.210559821     S         9
## Empty_hygro_1_AAACCCATCTGGTCAA-1 -0.03951238 -0.109141726    G1         0
## Empty_hygro_1_AAACGAAAGTAACCGG-1 -0.19186304 -0.017738155    G1         0
## Empty_hygro_1_AAACGAAGTCCTGGTG-1 -0.17219763 -0.005161036    G1         1

Plotting cell cycle score on UMAP

DimPlot(totipotency, reduction = "umap")

## Scaling out cell cycle

DefaultAssay(totipotency) <- "integrated"
totipotency <- ScaleData(totipotency, vars.to.regress=c("S.Score","G2M.Score"))
## Regressing out S.Score, G2M.Score
## Centering and scaling data matrix
save(totipotency, file="In_vivo_cell_cycle_corrected3.rda")
#load(file='In_vivo_cell_cycle_corrected3.rda')

Rerun PCA, UMAP and clustering

totipotency <- RunPCA(totipotency, npcs = 100, verbose = FALSE)
totipotency <- RunUMAP(totipotency, reduction = "pca", dims = 1:30)
## 18:26:40 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:26:40 Read 34804 rows and found 30 numeric columns
## 18:26:40 Using Annoy for neighbor search, n_neighbors = 30
## 18:26:40 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:26:44 Writing NN index file to temp file /tmp/RtmpIbyjqn/file2ddd5403e2bc0
## 18:26:44 Searching Annoy index using 1 thread, search_k = 3000
## 18:26:58 Annoy recall = 100%
## 18:26:58 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 18:27:01 Initializing from normalized Laplacian + noise (using irlba)
## 18:27:02 Commencing optimization for 200 epochs, with 1561036 positive edges
## 18:27:20 Optimization finished
totipotency <- FindNeighbors(totipotency, dims = 1:50)
## Computing nearest neighbor graph
## Computing SNN
totipotency <- FindClusters(totipotency, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 34804
## Number of edges: 1030551
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8245
## Number of communities: 17
## Elapsed time: 6 seconds
## 2 singletons identified. 15 final clusters.
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)

# Integrated analysis ### Identify variable genes 2

tot <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10_2 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10_2, repel = TRUE, xnudge=0, ynudge=0)
plot2

# Plot PCA2

print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
## PC_ 1 
## Positive:  Exoc4, Dst, Ranbp17, Trip12, Srgap2 
## Negative:  Cdc20, Tubb4b, Ube2c, Ldha, Aprt 
## PC_ 2 
## Positive:  Phlda3, Btg2, Sdc4, Plk2, Ptpn14 
## Negative:  Enox1, Adam23, Ntn1, Enah, Gli2 
## PC_ 3 
## Positive:  Hist1h2ae, Ano3, Ddit4l, Ptp4a3, Trp53inp1 
## Negative:  Col4a2, Col4a1, Lamb1, Srgn, Dab2 
## PC_ 4 
## Positive:  Svop, Phlda3, D630023F18Rik, Cdkn1a, Trp53inp1 
## Negative:  Arg2, Platr31, Wee2, Tcl1b4, Gm11487 
## PC_ 5 
## Positive:  Hist1h2ae, Hist1h1b, Hist1h1e, Hist1h2ap, 2410006H16Rik 
## Negative:  Tcf15, Nanog, Tmsb4x, Nfib, Nr5a2 
## PC_ 6 
## Positive:  Ptch1, Nrp2, Utrn, Map4k4, Rragd 
## Negative:  Gabarapl2, Mylpf, Tbx3, Ankrd33b, Sept1
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")

DimPlot(totipotency, reduction = "pca")

DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)

ElbowPlot(totipotency, ndims=100)

# Quantifying cells in new clusters

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Libraries per cluster")
Libraries per cluster
ML1 ML10 ML11 ML2 ML3 ML4 ML5 ML6 ML7 ML8 ML9
0 1090 (29.7%) 780 (29.3%) 5 (1.6%) 1000 (31.7%) 647 (16.4%) 811 (26.6%) 975 (23.5%) 951 (26.7%) 1069 (24.8%) 583 (27.9%) 1043 (26.8%)
1 869 (23.7%) 585 (22%) 11 (3.5%) 757 (24%) 496 (12.5%) 716 (23.5%) 883 (21.3%) 841 (23.6%) 903 (20.9%) 432 (20.7%) 812 (20.9%)
2 416 (11.4%) 283 (10.6%) 2 (0.6%) 383 (12.1%) 350 (8.8%) 346 (11.4%) 397 (9.6%) 293 (8.2%) 510 (11.8%) 225 (10.8%) 546 (14%)
3 391 (10.7%) 369 (13.8%) 6 (1.9%) 291 (9.2%) 301 (7.6%) 341 (11.2%) 397 (9.6%) 452 (12.7%) 473 (11%) 257 (12.3%) 404 (10.4%)
4 165 (4.5%) 185 (6.9%) 1 (0.3%) 167 (5.3%) 245 (6.2%) 309 (10.1%) 255 (6.2%) 323 (9.1%) 242 (5.6%) 133 (6.4%) 316 (8.1%)
5 186 (5.1%) 98 (3.7%) 0 (0%) 85 (2.7%) 40 (1%) 127 (4.2%) 761 (18.4%) 250 (7%) 435 (10.1%) 82 (3.9%) 76 (2%)
6 143 (3.9%) 180 (6.8%) 0 (0%) 108 (3.4%) 179 (4.5%) 172 (5.6%) 291 (7%) 282 (7.9%) 235 (5.4%) 125 (6%) 211 (5.4%)
7 15 (0.4%) 6 (0.2%) 117 (36.9%) 7 (0.2%) 1371 (34.7%) 49 (1.6%) 9 (0.2%) 26 (0.7%) 68 (1.6%) 3 (0.1%) 38 (1%)
8 306 (8.4%) 50 (1.9%) 0 (0%) 247 (7.8%) 63 (1.6%) 86 (2.8%) 58 (1.4%) 30 (0.8%) 99 (2.3%) 26 (1.2%) 25 (0.6%)
9 77 (2.1%) 83 (3.1%) 0 (0%) 80 (2.5%) 6 (0.2%) 74 (2.4%) 96 (2.3%) 105 (2.9%) 202 (4.7%) 91 (4.4%) 73 (1.9%)
10 2 (0.1%) 0 (0%) 4 (1.3%) 0 (0%) 210 (5.3%) 0 (0%) 0 (0%) 0 (0%) 16 (0.4%) 0 (0%) 293 (7.5%)
11 1 (0%) 44 (1.7%) 42 (13.2%) 28 (0.9%) 42 (1.1%) 14 (0.5%) 3 (0.1%) 1 (0%) 59 (1.4%) 106 (5.1%) 46 (1.2%)
12 0 (0%) 0 (0%) 118 (37.2%) 0 (0%) 4 (0.1%) 2 (0.1%) 1 (0%) 1 (0%) 2 (0%) 0 (0%) 2 (0.1%)
13 3 (0.1%) 2 (0.1%) 3 (0.9%) 0 (0%) 2 (0.1%) 0 (0%) 18 (0.4%) 10 (0.3%) 2 (0%) 27 (1.3%) 3 (0.1%)
14 0 (0%) 0 (0%) 8 (2.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Quantifying embryo cells in new clusters

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Embryo cells per cluster")
Embryo cells per cluster
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
0 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (10%) 0 (0%) 1 (3.3%) 0 (0%) 3 (5%) 0 (0%)
1 0 (0%) 0 (0%) 0 (0%) 5 (38.5%) 0 (0%) 0 (0%) 4 (9.3%) 1 (10%) 0 (0%) 1 (3.3%) 0 (0%) 0 (0%) 0 (0%)
2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (3.3%) 0 (0%)
3 1 (1.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (2.3%) 4 (40%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
4 1 (1.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
5 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
6 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
7 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 35 (81.4%) 4 (40%) 0 (0%) 24 (80%) 0 (0%) 54 (90%) 0 (0%)
8 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
9 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
10 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (13.3%) 0 (0%) 0 (0%) 0 (0%)
11 0 (0%) 0 (0%) 0 (0%) 0 (0%) 8 (100%) 8 (100%) 0 (0%) 0 (0%) 10 (100%) 0 (0%) 12 (100%) 0 (0%) 4 (100%)
12 53 (91.4%) 14 (100%) 47 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.7%) 0 (0%)
13 3 (5.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
14 0 (0%) 0 (0%) 0 (0%) 8 (61.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Totipotency, pluripotency and PE markers in each cluster

Find markers for every new cluster 2

DefaultAssay(totipotency) <- "RNA"
mus.markers <- FindAllMarkers(totipotency, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'In_vivo_markers3.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)

RNA slot of new object

totipotency <- ScaleData(totipotency, features = all.genes)
VlnPlot(totipotency, features = features_vec, pt.size=0)

#Heatmap

top10 <- mus.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(totipotency, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 4.5))
## Warning in DoHeatmap(totipotency, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the RNA assay:
## Trf, Ahsg, Apoa2, Apoa1, Gc, Sepp1, Fgb, Serpina1b, Serpina3k, Alb, Hprt,
## Isyna1, Mdh2, Eno1, Ppia, Pdxk, Gpd1l, Gnb2l1, Gm11517, Tceb1, Alppl2, Timd2,
## Spin1, LOC100502936, H3f3b, Gas5, Mat2a, Dynll1, Cks2, Cotl1, Clspn, Usp37,
## Hells, Ung, Cdc6, Slbp, Rrm2, Dtl, Ftl1, Prdx1, Gsta4, H2afz

Original identity on new UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)

p1

## Cell types from in vivo data

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "in.vivo")
p1

Final classification

new.cluster.ids <- c("Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent","Pluripotent", "Pluripotent", "Blastocyst, fibroblast", "Pluripotent", "Pluripotent", "Primitive Endoderm", "Totipotent, 2C, zygote", "4.8.16 cell", "Pluripotent", "Liver")
names(new.cluster.ids) <- levels(totipotency)
totipotency <- RenameIdents(totipotency, new.cluster.ids)
DimPlot(totipotency, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()

Libraries per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
ML1 ML10 ML11 ML2 ML3 ML4 ML5 ML6 ML7 ML8 ML9
Pluripotent 3646 (99.5%) 2615 (98.1%) 28 (8.8%) 3118 (98.9%) 2329 (58.9%) 2982 (97.9%) 4131 (99.7%) 3537 (99.2%) 4170 (96.6%) 1981 (94.8%) 3509 (90.3%)
Blastocyst, fibroblast 15 (0.4%) 6 (0.2%) 117 (36.9%) 7 (0.2%) 1371 (34.7%) 49 (1.6%) 9 (0.2%) 26 (0.7%) 68 (1.6%) 3 (0.1%) 38 (1%)
Primitive Endoderm 2 (0.1%) 0 (0%) 4 (1.3%) 0 (0%) 210 (5.3%) 0 (0%) 0 (0%) 0 (0%) 16 (0.4%) 0 (0%) 293 (7.5%)
Totipotent, 2C, zygote 1 (0%) 44 (1.7%) 42 (13.2%) 28 (0.9%) 42 (1.1%) 14 (0.5%) 3 (0.1%) 1 (0%) 59 (1.4%) 106 (5.1%) 46 (1.2%)
4.8.16 cell 0 (0%) 0 (0%) 118 (37.2%) 0 (0%) 4 (0.1%) 2 (0.1%) 1 (0%) 1 (0%) 2 (0%) 0 (0%) 2 (0.1%)
Liver 0 (0%) 0 (0%) 8 (2.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Embryo cells per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
Pluripotent 5 (8.6%) 0 (0%) 0 (0%) 5 (38.5%) 0 (0%) 0 (0%) 5 (11.6%) 6 (60%) 0 (0%) 2 (6.7%) 0 (0%) 5 (8.3%) 0 (0%)
Blastocyst, fibroblast 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 35 (81.4%) 4 (40%) 0 (0%) 24 (80%) 0 (0%) 54 (90%) 0 (0%)
Primitive Endoderm 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 4 (13.3%) 0 (0%) 0 (0%) 0 (0%)
Totipotent, 2C, zygote 0 (0%) 0 (0%) 0 (0%) 0 (0%) 8 (100%) 8 (100%) 0 (0%) 0 (0%) 10 (100%) 0 (0%) 12 (100%) 0 (0%) 4 (100%)
4.8.16 cell 53 (91.4%) 14 (100%) 47 (100%) 0 (0%) 0 (0%) 0 (0%) 3 (7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.7%) 0 (0%)
Liver 0 (0%) 0 (0%) 0 (0%) 8 (61.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Split plot per library

DimPlot(totipotency, reduction = "umap", split.by = "orig.ident", ncol = 3)

Save data as In_vivo_final3

save(totipotency, file="In_vivo_final3.rda")

Plotting cell cycle on new UMAP

DefaultAssay(totipotency) <- "RNA"
totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
DimPlot(totipotency, reduction = "umap")

#Cell cycle bar chart

totipotency@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
  geom_bar(position = "fill",size = 3, width = .8) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = "", y = "Cell fraction")

#Feature plot

FeaturePlot(totipotency, features = "Arg2")